# Replication code for Taylor C. Boas and Amy Erica Smith, “Looks Like Me, Thinks Like Me: Descriptive Representation and Opinion Congruence in Brazil.” Latin American Research Review 54, 2 (2019).

# Analysis conducted in R 3.6.0 on MacOS 10.13.6

# NOTE: This file replicates Appendix Table 1 as well as a result conveyed textually in the Appendix. We recommend running R replication files in the following order; please see readme.txt for details.
# 	1_merge_lapop.R
# 	2_merge_latinobarometro.R
# 	3_recode_reshape.R
# 	4_difference_in_distributions.R
# 	5_regressions.R
# 	6_civil_society_meeting.R
# 	7_mass_descriptives.R
# 	8_elite_descriptives.R
# 	9_mean_differences.R
#	10_difference_in_distributions_ks.R
#	11_elite_sample_simulation.R
#	12_converts_vs_lifelong.R

# Set working directory as appropriate
# setwd('~/Dropbox/brazil_leg_surveys/replication/')

# Clean desktop and load packages. Please make sure all necessary packages are installed.

library(Hmisc)
library(weights)

rm(list=ls(all=T))

load('lapop_brazil07.RData')
load('lapop_brazil08.RData')
load('lapop_brazil10.RData')
load('lapop_brazil12.RData')

lapop07$party<-gsub('(.*) \\-.*','\\1', lapop07$SEP2)
lapop07$party[lapop07$party=='NS']<-NA
lapop08$party<-as.character(lapop08$vb11)
lapop08$party[lapop08$party %in% c('Nenhum','Outro')]<-NA
lapop10$party<-toupper(as.character(lapop10$vb11))
lapop10$party[lapop10$party == 'OUTRO']<-NA
lapop10$party[lapop10$party == 'DEMOCRATAS']<-'DEM'
lapop10$party[lapop10$party == 'PCDOB']<-'PC do B'
lapop10$party[lapop10$party %in% c('NS','NR','NA')]<-NA
lapop12$party<-toupper(as.character(lapop12$vb11))
lapop12$party<-gsub('\\(.*\\) ','', lapop12$party)
lapop12$party[lapop12$party == 'OUTRO']<-NA
lapop12$party[lapop12$party == 'DEMOCRATAS']<-'DEM'
lapop12$party[lapop12$party == 'PCDOB']<-'PC do B'

names(lapop07)[which(names(lapop07)=='CP13')]<-'cp13'
lapop07$cp13<-as.vector(unclass(lapop07$cp13))
lapop08$cp13<-as.vector(unclass(lapop08$cp13))
lapop10$cp13<-as.vector(unclass(lapop10$cp13))
lapop12$cp13<-as.vector(unclass(lapop12$cp13))

names(lapop07)[which(names(lapop07)=='CP8')]<-'cp8'
lapop07$cp8<-as.vector(unclass(lapop07$cp8))
lapop08$cp8<-as.vector(unclass(lapop08$cp8))
lapop10$cp8<-as.vector(unclass(lapop10$cp8))
lapop12$cp8<-as.vector(unclass(lapop12$cp8))

names(lapop07)[which(names(lapop07)=='CP9')]<-'cp9'
lapop07$cp9<-as.vector(unclass(lapop07$cp9))
lapop08$cp9<-as.vector(unclass(lapop08$cp9))
lapop10$cp9<-as.vector(unclass(lapop10$cp9))
lapop12$cp9<-as.vector(unclass(lapop12$cp9))

names(lapop07)[which(names(lapop07)=='CP20')]<-'cp20'
lapop07$cp20<-NA
lapop08$cp20<-as.vector(unclass(lapop08$cp20))
lapop10$cp20<-as.vector(unclass(lapop10$cp20))
lapop12$cp20<-as.vector(unclass(lapop12$cp20))

names(lapop07)[which(names(lapop07)=='CP10')]<-'cp10'
lapop07$cp10<-as.vector(unclass(lapop07$cp10))
lapop08$cp10<-as.vector(unclass(lapop08$cp10))
lapop10$cp10<-NA
lapop12$cp10<-NA

# Weights to deal with over/undersampling by region in BEPS 2010 and AB 2012

lapop07$pweight<-lapop08$pweight<-1

region.pop<-read.csv('~/Dropbox/Other Documents/Brazil data/region2010.csv',as.is=T)
region.pop<-apply(region.pop[2:6],2,sum)
region.pct.pop<-data.frame(region=c('North','Northeast','Southeast','South','Center-West'),pop_pct=100*prop.table(region.pop))

lapop10.uf<-data.frame(100*prop.table(table(lapop10$uf)))
names(lapop10.uf)<-c('uf','pct')
lapop10.uf$region<-factor(1*(lapop10.uf$uf %in% c('PR','RS','SC'))+2*(lapop10.uf$uf %in% c('SP','RJ','ES','MG'))+3*(lapop10.uf$uf %in% c('AM','RR','AP','PA','TO','RO','AC'))+4*(lapop10.uf$uf %in% c('MA','PI','CE','RN','PE','PB','SE','AL','BA'))+5*(lapop10.uf$uf %in% c('MT','MS','GO','DF')),labels=c('South','Southeast','North','Northeast','Center-West'))
lapop10.region<-with(lapop10.uf,aggregate(list(pct=pct),list(region=region),sum))
lapop10.region<-merge(lapop10.region,region.pct.pop)
lapop10.region$pweight<-lapop10.region$pop_pct/lapop10.region$pct
lapop10.uf<-merge(lapop10.uf,lapop10.region[,c('region','pweight')])
lapop10<-merge(lapop10,lapop10.uf[,c('uf','pweight')])

lapop12.uf<-data.frame(100*prop.table(table(lapop12$uf)))
names(lapop12.uf)<-c('uf','pct')
lapop12.uf$region<-factor(1*(lapop12.uf$uf %in% c('PR','RS','SC'))+2*(lapop12.uf$uf %in% c('SP','RJ','ES','MG'))+3*(lapop12.uf$uf %in% c('AM','RR','AP','PA','TO','RO','AC'))+4*(lapop12.uf$uf %in% c('MA','PI','CE','RN','PE','PB','SE','AL','BA'))+5*(lapop12.uf$uf %in% c('MT','MS','GO','DF')),labels=c('South','Southeast','North','Northeast','Center-West'))
lapop12.region<-with(lapop12.uf,aggregate(list(pct=pct),list(region=region),sum))
lapop12.region<-merge(lapop12.region,region.pct.pop)
lapop12.region$pweight<-lapop12.region$pop_pct/lapop12.region$pct
lapop12.uf<-merge(lapop12.uf,lapop12.region[,c('region','pweight')])
lapop12<-merge(lapop12,lapop12.uf[,c('uf','pweight')])

lapop<-rbind(lapop07[,c('party','region','cp8','cp9','cp10','cp13','cp20','pweight')], lapop08[,c('party','region','cp8','cp9','cp10','cp13','cp20','pweight')], lapop10[,c('party','region','cp8','cp9','cp10','cp13','cp20','pweight')], lapop12[,c('party','region','cp8','cp9','cp10','cp13','cp20','pweight')])

# Party meetings

party<-round(100*prop.table(rbind(PMDB=with(lapop[lapop$party=='PMDB',],sapply(1:4,function(x) sum(pweight[cp13==x],na.rm=T))),PSDB=with(lapop[lapop$party=='PSDB',],sapply(1:4,function(x) sum(pweight[cp13==x],na.rm=T))),PT=with(lapop[lapop$party=='PT',],sapply(1:4,function(x) sum(pweight[cp13==x],na.rm=T)))),1),1)

# Community associations
community<-round(100*prop.table(rbind(PMDB=with(lapop[lapop$party=='PMDB',],sapply(1:4,function(x) sum(pweight[cp8==x],na.rm=T))),PSDB=with(lapop[lapop$party=='PSDB',],sapply(1:4,function(x) sum(pweight[cp8==x],na.rm=T))),PT=with(lapop[lapop$party=='PT',],sapply(1:4,function(x) sum(pweight[cp8==x],na.rm=T)))),1),1)

# Professional associations
professional<-round(100*prop.table(rbind(PMDB=with(lapop[lapop$party=='PMDB',],sapply(1:4,function(x) sum(pweight[cp9==x],na.rm=T))),PSDB=with(lapop[lapop$party=='PSDB',],sapply(1:4,function(x) sum(pweight[cp9==x],na.rm=T))),PT=with(lapop[lapop$party=='PT',],sapply(1:4,function(x) sum(pweight[cp9==x],na.rm=T)))),1),1)

# Unions
union<-round(100*prop.table(rbind(PMDB=with(lapop[lapop$party=='PMDB',],sapply(1:4,function(x) sum(pweight[cp10==x],na.rm=T))),PSDB=with(lapop[lapop$party=='PSDB',],sapply(1:4,function(x) sum(pweight[cp10==x],na.rm=T))),PT=with(lapop[lapop$party=='PT',],sapply(1:4,function(x) sum(pweight[cp10==x],na.rm=T)))),1),1)

# Women's organizations
women<-round(100*prop.table(rbind(PMDB=with(lapop[lapop$party=='PMDB',],sapply(1:4,function(x) sum(pweight[cp20==x],na.rm=T))),PSDB=with(lapop[lapop$party=='PSDB',],sapply(1:4,function(x) sum(pweight[cp20==x],na.rm=T))),PT=with(lapop[lapop$party=='PT',],sapply(1:4,function(x) sum(pweight[cp20==x],na.rm=T)))),1),1)

# ===============================================================
# Appendix Table 1: Civil Society Participation by Party Sympathy
# ===============================================================

partic.table<-rbind(community,professional,union,women,party)
colnames(partic.table)<-c('Once\na week','1--2 times\na month','1--2 times\na year','\nNever')

bottom.note="NOTE: Entries are row percentages based on the combined AmericasBarometer Brazil surveys from 2007, 2008, 2010, and 2012, except for labor unions (not asked in 2010-2012) and women's groups (not asked in 2007). Population weights are applied in 2010 and 2012."

partic.table.latex<-latex(partic.table,file='partic_table.tex',collabel.just=rep('c',4),col.just = rep('c',4),extracolsize='normalsize',rowlabel = 'Organization Type', rgroup=c('Community Association','Professional Association','Labor Union',"Women's Group",'Political Party'),n.rgroup=rep(3,5), cgroup='Meeting Attendance Frequency',n.cgroup=4,caption = 'Civil Society Participation by Party Sympathy', insert.bottom=bottom.note, booktabs = F, ctable = T, where = "htp")

# ==========================
# Results Conveyed Textually 
# ==========================

# Appendix: "The only significant difference between the PT and the PMDB is in the percentage who never attend women’s groups."

wtd.t.test(unclass(lapop$cp13[lapop$party=='PT'])==4, unclass(lapop$cp13[lapop$party=='PSDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PSDB'],samedata=F)$coefficients['p.value']
wtd.t.test(unclass(lapop$cp13[lapop$party=='PT'])==4, unclass(lapop$cp13[lapop$party=='PMDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PMDB'],samedata=F)$coefficients['p.value']

wtd.t.test(unclass(lapop$cp8[lapop$party=='PT'])==4, unclass(lapop$cp8[lapop$party=='PSDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PSDB'],samedata=F)$coefficients['p.value']
wtd.t.test(unclass(lapop$cp8[lapop$party=='PT'])==4, unclass(lapop$cp8[lapop$party=='PMDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PMDB'],samedata=F)$coefficients['p.value']

wtd.t.test(unclass(lapop$cp9[lapop$party=='PT'])==4, unclass(lapop$cp9[lapop$party=='PSDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PSDB'],samedata=F)$coefficients['p.value']
wtd.t.test(unclass(lapop$cp9[lapop$party=='PT'])==4, unclass(lapop$cp9[lapop$party=='PMDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PMDB'],samedata=F)$coefficients['p.value']

wtd.t.test(unclass(lapop$cp10[lapop$party=='PT'])==4, unclass(lapop$cp10[lapop$party=='PSDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PSDB'],samedata=F)$coefficients['p.value']
wtd.t.test(unclass(lapop$cp10[lapop$party=='PT'])==4, unclass(lapop$cp10[lapop$party=='PMDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PMDB'],samedata=F)$coefficients['p.value']

wtd.t.test(unclass(lapop$cp20[lapop$party=='PT'])==4, unclass(lapop$cp20[lapop$party=='PSDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PSDB'],samedata=F)$coefficients['p.value']
wtd.t.test(unclass(lapop$cp20[lapop$party=='PT'])==4, unclass(lapop$cp20[lapop$party=='PMDB'])==4,weight=lapop$pweight[lapop$party=='PT'],weighty=lapop$pweight[lapop$party=='PMDB'],samedata=F)$coefficients['p.value']

